home *** CD-ROM | disk | FTP | other *** search
Text File | 1994-02-18 | 27.4 KB | 1,163 lines |
- ;
- ; Flick FLI-format Animation Viewer v1.2 19 Feb 1994
- ; --------------------------------------
- ;
- ;
- ;This program plays FLI/FLC-format bitmapped animation files on any ECS
- ;or AGA Amiga running OS2.04 or higher. FLI/FLC-format files are
- ;produced by Autodesk Animator and Autodesk 3D Studio on a PC, as well
- ;as by other programs.
- ;
- ;The files in this archive may be distributed anywhere provided they are
- ;unmodified and are not sold for profit.
- ;
- ;Ownership and copyright of all files remains with the author:
- ;
- ; Peter McGavin, 86 Totara Crescent, Lower Hutt, New Zealand.
- ; e-mail: peterm@maths.grace.cri.nz
- ;
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ; xdef _chunky2planar
-
- ; peterm/adaptive7.s
- ; Combines peterm/chunky4.s and jmccoull/blitter4pass.s
- ; The blitter works on the top portion of the display at the same time as
- ; the CPU converts the bottom portion.
- ; The blitter has completely finished before the routine returns.
- ; Both parts of every call are timed using the EClock.
- ; The partition point is recalculated at the end of the call in an attempt
- ; to keep the two routines taking about the same amount of time.
- ;
- ; The following formula is used:
- ;
- ; n_blit = n * t_cpu * n_blit / (t_blit * n_cpu + t_cpu * n_blit)
- ;
- ; where:
- ; n is the total number of 32-byte units (i.e, width*height/32)
- ; n_blit is the number of 32-byte units above the partition
- ; n_cpu is the number of 32-byte units below the partition (=n-n_blit)
- ; t_blit is the time taken by the blitter in EClock units
- ; t_cpu is the time taken by the cpu in EClock units
- ;
- ; ECS Agnus required (for long blits)
- ; KS2.0 required (for utility.library and EClock)
-
- ;-----------------------------------------------------------------------------
- ; Set Macro68 defaults
-
- default _branch,_word
- default _adrbasedisp,_word
- default _pcbasedisp,_word
- default _outerdisp,_word
- default _absolute,_abs
-
- bltcpt equ $048
- bltbpt equ $04c
- bltapt equ $050
- bltdpt equ $054
- bltsizv equ $05c
- bltsizh equ $05e
- cleanup equ $40
- _LVOReadEClock equ -60
- _LVOCacheClearU equ -636
- _LVOUMult32 equ -144
- _LVOUDivMod32 equ -156
-
- ;-----------------------------------------------------------------------------
- ; chunky2planar: (new Motorola syntax)
- ; a0 -> chunky pixels (in FAST RAM)
- ; a1 -> plane0 (assume other 7 planes are allocated contiguously)
- ; a3 -> tmp chip buffer0, size=width*height
- ; a4 -> tmp chip buffer1, size=width*height
- ; a5 = TimerBase
- ; a6 = GfxBase
- ; d0 = UtilityBase
- ; d1 = width*height/32 (if generic is defined)
-
- ifnd generic
- pixels equ width*height
- plsiz equ width*height/8
- endc
-
-
- section code,code
-
- ifd generic
- ifeq depth-8
- _c2p_8::
- else
- ifeq depth-6
- _c2p_6::
- else
- ifeq depth-4
- _c2p_4::
- endc
- endc
- endc
- else
- ifeq depth-8
- ifeq width-320
- _c2p320x200x8::
- endc
- else
- ifeq depth-6
- ifeq width-320
- _c2p320x200x6::
- endc
- else
- die "Unrecognised resolution"
- endc
- endc
- endc
-
- movem.l d2-d7/a2-a6,-(sp)
-
- ; save parameters
-
- movea.l #mybltnode,a2
- move.l a0,(chunky-mybltnode,a2)
- move.l a1,(plane0-mybltnode,a2)
- move.l a3,(buff0-mybltnode,a2)
- move.l a4,(buff1-mybltnode,a2)
- move.l a5,(timerbase-mybltnode,a2)
- move.l a6,(gfxbase-mybltnode,a2)
- move.l d0,(utilitybase-mybltnode,a2)
-
- ; if different size then initialise v_plsiz, v_plsiz_depth and split location
- ifd generic
- cmp.l (v_plsiz-mybltnode,a2),d1
- beq.b skip_relocate
- move.l d1,(v_plsiz-mybltnode,a2)
-
- move.l d1,d0
- lsr.l #3,d0
- move.l d0,(n_blit-mybltnode,a2)
- lsl.l #2,d0
- move.l d0,(plsiz_blit-mybltnode,a2)
- lsl.l #3,d0
- move.l d0,(pixels_blit-mybltnode,a2)
-
- moveq #depth-1,d0
- movea.l (utilitybase-mybltnode,a2),a6
- jsr (_LVOUMult32,a6)
- move.l d0,(v_plsiz_depth-mybltnode,a2)
-
- bclr #0,(firsttimeflag-mybltnode,a2)
- bne.b skip_relocate
-
- ; see if this is the first time thru
- else
- tst.b (firsttimeflag-mybltnode,a2)
- bne.b skip_relocate ; branch if not being called 1st time
- endc
-
- ; relocate the mainloop to a quad-longword boundary (for 030/040 cache line)
-
- lea (begincode,pc),a4
- adda.w #15,a4
- move.l a4,d0
- and.w #~15,d0
- movea.l d0,a4
- lea (mainloop,pc),a3
- move.w #(endcode-mainloop)/2-1,d0
- 1$: move.w (a3)+,(a4)+
- dbra d0,1$
-
- ; flush the caches
-
- movea.l (4).w,a6
- jsr (_LVOCacheClearU,a6)
-
- skip_relocate:
-
- ; see if chunky data are in CHIP ram
-
- movea.l (chunky-mybltnode,a2),a1
- move.l a1,(source-mybltnode,a2)
- movea.l (4).w,a6
- jsr (_LVOTypeOfMem,a6)
- and.w #MEMF_CHIP,d0
- bne.b readstart ; branch if already in CHIP
-
- ; copy pixels_blit from chunky to buff0 (from FAST to CHIP) for the blitter
-
- movea.l (chunky-mybltnode,a2),a0
- movea.l (buff0-mybltnode,a2),a1
- move.l a1,(source-mybltnode,a2)
- move.l (pixels_blit-mybltnode,a2),d0
- jsr (_LVOCopyMemQuick,a6)
-
- ; read the start time
-
- readstart: lea (starttime-mybltnode,a2),a0
- movea.l (timerbase-mybltnode,a2),a6
- jsr (_LVOReadEClock,a6)
-
- ; start the blitter in the background
-
- st (waitflag-mybltnode,a2)
- movea.l a2,a1
- movea.l (gfxbase-mybltnode,a2),a6
- jsr (_LVOQBlit,a6)
-
- ; compute starting parameters for the CPU routine
-
- ifd generic
- move.l (v_plsiz-mybltnode,a2),d0
- move.l d0,-(sp)
- move.l (v_plsiz_depth-mybltnode,a2),-(sp)
- else
- move.l #plsiz,d0
- endc
- sub.l (plsiz_blit-mybltnode,a2),d0
- lsr.l #2,d0
- move.w d0,-(sp) ; outer loop counter on stack
-
- move.l (chunky-mybltnode,a2),a0
- adda.l (pixels_blit-mybltnode,a2),a0 ; offset into chunky
-
- move.l (plane0-mybltnode,a2),a1
- adda.l (plsiz_blit-mybltnode,a2),a1 ; offset into plane
-
- lea (buffers-mybltnode,a2),a3 ; a3 -> buffers
-
- ifnd generic
- iflt 4*plsiz-4-32768
- adda.w #3*plsiz,a1 ; a1 -> plane 3
- else
- iflt 2*plsiz-4-32768
- adda.w #1*plsiz,a1 ; a1 -> plane 1
- endc
- endc
- endc
-
- ; set up register constants
-
- move.l #$0f0f0f0f,d5 ; d5 = constant $0f0f0f0f
- move.l #$55555555,d6 ; d6 = constant $55555555
- move.l #$3333cccc,d7 ; d7 = constant $3333cccc
- lea (4,a3),a2 ; used for inner loop end test
-
- ; load up address registers with buffer ptrs
-
- lea (2*4,a3),a4 ; a4 -> plane2buf
- lea (2*4,a4),a5 ; a5 -> plane4buf
- lea (2*4,a5),a6 ; a6 -> plane6buf
-
- begincode: rept 8 ; space for mainloop code relocation
- nop
- endr
-
- ; main loop (starts here) processes 8 chunky pixels at a time
-
- mainloop: move.l (a0)+,d2 ; 12 get next 4 chunky pixels in d2
- move.l (a0)+,d3 ; 12 get next 4 chunky pixels in d3
- ifgt depth-4
- move.l d2,d0 ; 4
- and.l d5,d2 ; 8 d5=$0f0f0f0f
- move.l d3,d1 ; 4
- and.l d5,d3 ; 8 d5=$0f0f0f0f
- eor.l d2,d0 ; 8
- eor.l d3,d1 ; 8
- lsr.l #4,d1 ; 16
- or.l d1,d0 ; 8
- endc
- lsl.l #4,d2 ; 16
- or.l d3,d2 ; 8
- move.l d2,d3 ; 4
- and.l d7,d3 ; 8 d7=$3333cccc
- eor.l d3,d2 ; 8
- lsr.w #2,d3 ; 10
- swap d3 ; 4
- lsl.w #2,d3 ; 10
- or.l d2,d3 ; 8
- ifgt depth-4
- move.l d0,d1 ; 4
- and.l d7,d1 ; 8 d7=$3333cccc
- eor.l d1,d0 ; 8
- lsr.w #2,d1 ; 10
- swap d1 ; 4
- lsl.w #2,d1 ; 10
- or.l d0,d1 ; 8
- move.l d1,d2 ; 4
- lsr.l #7,d2 ; 22
- move.l d1,d0 ; 4
- and.l d6,d0 ; 8 d6=$55555555
- endc
- ifgt depth-4
- eor.l d0,d1 ; 8
- move.l d2,d4 ; 4
- and.l d6,d4 ; 8 d6=$55555555
- eor.l d4,d2 ; 8
- restart: or.l d4,d1 ; 8
- lsr.l #1,d1 ; 10
- move.b d1,(4,a5) ; 12 plane 5
- ifgt depth-6
- swap d1 ; 4
- move.b d1,(4,a6) ; 12 plane 7
- endc
- or.l d0,d2 ; 8
- move.b d2,(a5)+ ; 8 plane 4
- ifgt depth-6
- swap d2 ; 4
- move.b d2,(a6)+ ; 8 plane 6
- endc
- endc
- move.l d3,d2 ; 4
- lsr.l #7,d2 ; 22
- move.l d3,d0 ; 4
- and.l d6,d0 ; 8 d6=$55555555
- eor.l d0,d3 ; 8
- move.l d2,d4 ; 4
- and.l d6,d4 ; 8 d6=$55555555
- eor.l d4,d2 ; 8
- or.l d4,d3 ; 8
- lsr.l #1,d3 ; 10
- restart2: move.b d3,(4,a3) ; 12 plane 1
- swap d3 ; 4
- move.b d3,(4,a4) ; 12 plane 3
- or.l d0,d2 ; 8
- move.b d2,(a3)+ ; 8 plane 0
- swap d2 ; 4
- move.b d2,(a4)+ ; 8 plane 2
-
- cmpa.l a3,a2 ;128 6
- bne.w mainloop ;130 10 total=512 (64.0 cycles/pixel)
-
- ; move stack buffers to bitplanes (longword writes) and restore ptrs
- ; Intersperse some instructions for the next loop between writes to do something
- ; useful while waiting for the Chip bus.
- ; Do this by defining macros b0..b8 which are then interleaved with some
- ; instructions for next loop.
- ; Check if finished, go back for more.
-
- ifd generic
- b0 macro
- move.l (6,sp),d4 ; (v_plsiz) a1 points at plane 0
- adda.l (2,sp),a1 ; (v_plsiz_depth)
- endm
- b1 macro
- move.l (a6),(a1) ; plane 7
- suba.l d4,a1 ;
- endm
- b2 macro
- move.l -(a6),(a1) ; plane 6
- suba.l d4,a1 ;
- endm
- b3 macro
- move.l (a5),(a1) ; plane 5
- suba.l d4,a1 ;
- endm
- b4 macro
- move.l -(a5),(a1) ; plane 4
- suba.l d4,a1 ;
- endm
- b5 macro
- move.l (a4),(a1) ; plane 3
- suba.l d4,a1 ;
- endm
- b6 macro
- move.l -(a4),(a1) ; plane 2
- suba.l d4,a1 ;
- endm
- b7 macro
- move.l (a3),(a1) ; plane 1
- suba.l d4,a1 ;
- endm
- b8 macro
- move.l -(a3),(a1)+ ; plane 0
- endm
- endc
-
- ifnd generic
- iflt 4*plsiz-4-32768 ; a1 points into plane 3
- b0 macro
- move.l (a4),(a1)+ ; plane 3
- endm
- b1 macro
- endm
- b2 macro
- move.l (a6),(4*plsiz-4,a1) ; plane 7
- endm
- b3 macro
- move.l -(a6),(3*plsiz-4,a1) ; plane 6
- endm
- b4 macro
- move.l (a5),(2*plsiz-4,a1) ; plane 5
- endm
- b5 macro
- move.l -(a5),(1*plsiz-4,a1) ; plane 4
- endm
- b6 macro
- move.l -(a4),(-1*plsiz-4,a1) ; plane 2
- endm
- b7 macro
- move.l (a3),(-2*plsiz-4,a1) ; plane 1
- endm
- b8 macro
- move.l -(a3),(-3*plsiz-4,a1) ; plane 0
- endm
- endc
- endc
-
- ifnd generic
- ifge 4*plsiz-4-32768
- iflt 2*plsiz-32768 ; a1 points into plane 1
- b0 macro
- ifgt depth-4
- adda.l #4*plsiz,a1
- endc
- endm
- b1 macro
- move.l (a6),(2*plsiz,a1) ; plane 7
- endm
- b2 macro
- move.l -(a6),(1*plsiz,a1) ; plane 6
- endm
- b3 macro
- move.l (a5),(0*plsiz,a1) ; plane 5
- endm
- b4 macro
- move.l -(a5),(-1*plsiz,a1) ; plane 4
- suba.l #4*plsiz,a1
- endm
- b5 macro
- move.l (a4),(2*plsiz,a1) ; plane 3
- endm
- b6 macro
- move.l -(a4),(1*plsiz,a1) ; plane 2
- endm
- b7 macro
- move.l (a3),(a1)+ ; plane 1
- endm
- b8 macro
- move.l -(a3),(-1*plsiz-4,a1) ; plane 0
- endm
- endc
- endc
- endc
-
- ifnd generic
- ifge 2*plsiz-32768 ; a1 points into plane 1
- iflt plsiz-32768 ; a1 points into plane 0
- b0 macro
- ifgt plsiz-5
- adda.l #6*plsiz,a1
- else
- adda.l #3*plsiz,a1
- endc
- endm
- b1 macro
- move.l (a6),(plsiz,a1) ; plane 7
- endm
- b2 macro
- move.l -(a6),(a1) ; plane 6
- endm
- b3 macro
- move.l (a5),(-plsiz,a1) ; plane 5
- suba.l #3*plsiz,a1
- endm
- b4 macro
- move.l -(a5),(plsiz,a1) ; plane 4
- endm
- b5 macro
- move.l (a4),(a1) ; plane 3
- endm
- b6 macro
- move.l -(a4),(-plsiz,a1) ; plane 2
- suba.l #3*plsiz,a1
- endm
- b7 macro
- move.l (a3),(plsiz,a1) ; plane 1
- endm
- b8 macro
- move.l -(a3),(a1)+ ; plane 0
- endm
- endc
- endc
- endc
-
- ifnd generic
- ifge plsiz-32768 ; a1 points into plane 0
- b0 macro
- move.l #plsiz,d4
- adda.l #(depth-1)*plsiz,a1
- endm
- b1 macro
- move.l (a6),(a1) ; plane 7
- suba.l d4,a1
- endm
- b2 macro
- move.l -(a6),(a1) ; plane 6
- suba.l d4,a1
- endm
- b3 macro
- move.l (a5),(a1) ; plane 5
- suba.l d4,a1
- endm
- b4 macro
- move.l -(a5),(a1) ; plane 4
- suba.l d4,a1
- endm
- b5 macro
- move.l (a4),(a1) ; plane 3
- suba.l d4,a1
- endm
- b6 macro
- move.l -(a4),(a1) ; plane 2
- suba.l d4,a1
- endm
- b7 macro
- move.l (a3),(a1) ; plane 1
- suba.l d4,a1
- endm
- b8 macro
- move.l -(a3),(a1)+ ; plane 0
- endm
- endc
- endc
-
- ; Now use the macros b0..b8 interleaved in instructions for next loop
-
- ; 8 planes
- ifeq depth-8
- b0
- b1 ; plane 7
- move.l (a0)+,d2 ; 12 get next 4 chunky pixels in d2
- move.l (a0)+,d3 ; 12 get next 4 chunky pixels in d3
- move.l d2,d0 ; 4
- and.l d5,d2 ; 8 d5=$0f0f0f0f
- b2 ; plane 6
- move.l d3,d1 ; 4
- and.l d5,d3 ; 8 d5=$0f0f0f0f
- eor.l d2,d0 ; 8
- eor.l d3,d1 ; 8
- lsr.l #4,d1 ; 16
- b3 ; plane 5
- or.l d1,d0 ; 8
- lsl.l #4,d2 ; 16
- or.l d3,d2 ; 8
- move.l d2,d3 ; 4
- and.l d7,d3 ; 8 d7=$3333cccc
- b4 ; plane 4
- eor.l d3,d2 ; 8
- lsr.w #2,d3 ; 10
- swap d3 ; 4
- lsl.w #2,d3 ; 10
- or.l d2,d3 ; 8
- b5 ; plane 3
- move.l d0,d1 ; 4
- and.l d7,d1 ; 8 d7=$3333cccc
- eor.l d1,d0 ; 8
- lsr.w #2,d1 ; 10
- swap d1 ; 4
- b6 ; plane 2
- lsl.w #2,d1 ; 10
- or.l d0,d1 ; 8
- move.l d1,d2 ; 4
- lsr.l #7,d2 ; 22
- move.l d1,d0 ; 4
- b7 ; plane 1
- and.l d6,d0 ; 8 d6=$55555555
- eor.l d0,d1 ; 8
- move.l d2,d4 ; 4
- and.l d6,d4 ; 8 d6=$55555555
- eor.l d4,d2 ; 8
- b8 ; plane 0
- sub.w #1,(sp)
- bne.w restart
- else
- ifeq depth-6
- b0
- b3 ; plane 5
- move.l (a0)+,d2 ; 12 get next 4 chunky pixels in d2
- move.l (a0)+,d3 ; 12 get next 4 chunky pixels in d3
- move.l d2,d0 ; 4
- and.l d5,d2 ; 8 d5=$0f0f0f0f
- move.l d3,d1 ; 4
- and.l d5,d3 ; 8 d5=$0f0f0f0f
- b4 ; plane 4
- eor.l d2,d0 ; 8
- eor.l d3,d1 ; 8
- lsr.l #4,d1 ; 16
- or.l d1,d0 ; 8
- lsl.l #4,d2 ; 16
- or.l d3,d2 ; 8
- b5 ; plane 3
- move.l d2,d3 ; 4
- and.l d7,d3 ; 8 d7=$3333cccc
- eor.l d3,d2 ; 8
- lsr.w #2,d3 ; 10
- swap d3 ; 4
- lsl.w #2,d3 ; 10
- b6 ; plane 2
- or.l d2,d3 ; 8
- move.l d0,d1 ; 4
- and.l d7,d1 ; 8 d7=$3333cccc
- eor.l d1,d0 ; 8
- lsr.w #2,d1 ; 10
- swap d1 ; 4
- lsl.w #2,d1 ; 10
- or.l d0,d1 ; 8
- move.l d1,d2 ; 4
- b7 ; plane 1
- lsr.l #7,d2 ; 22
- move.l d1,d0 ; 4
- and.l d6,d0 ; 8 d6=$55555555
- eor.l d0,d1 ; 8
- move.l d2,d4 ; 4
- and.l d6,d4 ; 8 d6=$55555555
- eor.l d4,d2 ; 8
- b8 ; plane 0
- sub.w #1,(sp)
- bne.w restart
- else
- ifeq depth-4
- b0
- b5 ; plane 3
- move.l (a0)+,d2 ; 12 get next 4 chunky pixels in d2
- move.l (a0)+,d3 ; 12 get next 4 chunky pixels in d3
- lsl.l #4,d2 ; 16
- or.l d3,d2 ; 8
- move.l d2,d3 ; 4
- and.l d7,d3 ; 8 d7=$3333cccc
- eor.l d3,d2 ; 8
- b6 ; plane 2
- lsr.w #2,d3 ; 10
- swap d3 ; 4
- lsl.w #2,d3 ; 10
- or.l d2,d3 ; 8
- move.l d3,d2 ; 4
- lsr.l #7,d2 ; 22
- move.l d3,d0 ; 4
- b7 ; plane 1
- and.l d6,d0 ; 8 d6=$55555555
- eor.l d0,d3 ; 8
- move.l d2,d1 ; 4
- and.l d6,d1 ; 8 d6=$55555555
- eor.l d1,d2 ; 8
- or.l d1,d3 ; 8
- lsr.l #1,d3 ; 10
- b8 ; plane 0
- sub.w #1,(sp)
- bne.w restart2
- else
- die "Unsupported depth"
- endc
- endc
- endc
-
- jmp (endcode) ; break out of relocated code
- endcode:
-
- ; CPU all done! restore stack
-
- ifd generic
- add.w #10,sp ; remove stack vars
- else
- addq.l #2,sp ; remove outer loop counter
- endc
-
- ; find out how long it took
-
- lea (endcputime-buffers,a3),a0
- movea.l (timerbase-buffers,a3),a6 ; timerbase
- jsr (_LVOReadEClock,a6)
-
- ; wait for the blitter to finish
- ; busy-wait (for a very short time) on FAST bus, even on a CHIP-only machine
-
- movea.l (gfxbase-buffers,a3),a6
- bra.b endwaitloop
- waitloop: jsr (_LVOWaitBlit,a6)
- endwaitloop: tst.b (waitflag-buffers,a3)
- bne.b waitloop
-
- ; get blittime,cputime,n_blit in d2,d3,d0
-
- move.l (endblittime+4-buffers,a3),d2
- sub.l (starttime+4-buffers,a3),d2
-
- move.l (endcputime+4-buffers,a3),d3
- sub.l (starttime+4-buffers,a3),d3
-
- move.l (n_blit-buffers,a3),d0
-
- ; branch if this is not the first time through
-
- bset #0,(firsttimeflag-buffers,a3)
- bne.b simple
-
- ; calculate new partition point for next call using formula
- ; n_blit = n * (t_cpu * n_blit / (t_blit * n_cpu + t_cpu * n_blit))
- ; d0 = plsiz/4 * d3 * d0 / (d2 * (plsiz/4 - d0) + d3 * d0)
-
- movea.l (utilitybase-buffers,a3),a6
-
- moveq #10,d4
- lsr.l d4,d2 ; scale t_blit (avoid overflow)
- lsr.l d4,d3 ; scale t_cpu
-
- move.l d0,d4
- move.l d3,d1
- jsr (_LVOUMult32,a6)
- move.l d0,d3
-
- ifd generic
- move.l (v_plsiz-buffers,a3),d1
- lsr.l #2,d1
- else
- move.l #plsiz/4,d1
- endc
- jsr (_LVOUMult32,a6)
- move.l d0,d5
-
- ifd generic
- move.l (v_plsiz-buffers,a3),d0
- lsr.l #2,d0
- else
- move.l #plsiz/4,d0
- endc
- sub.l d4,d0
- move.l d2,d1
- jsr (_LVOUMult32,a6)
- add.l d0,d3
-
- move.l d5,d0
- move.l d3,d1
- jsr (_LVOUDivMod32,a6)
-
- bra.b done
-
- ; simple-minded adjustment
-
- simple: sub.l d3,d2 ; blittime-cputime
- beq.b alldone ; can't do better than this
- bgt.b 1$
- ; blittime < cputime, increase n_blit
- addq.l #8,d0
- ifd generic
- move.l (v_plsiz-buffers,a3),d1
- lsr.l #2,d1
- cmp.l d1,d0
- else
- cmp.l #plsiz/4,d0
- endc
- bcs.b done
- bra.b alldone ; don't go out of range
- ; blittime > cputime, decrease n_blit
- 1$: subq.l #8,d0
- bhi.b done
- bra.b alldone ; don't go out of range
-
- ; save the new partition point
-
- done: move.l d0,(n_blit-buffers,a3)
- lsl.l #2,d0
- move.l d0,(plsiz_blit-buffers,a3)
- lsl.l #3,d0
- move.l d0,(pixels_blit-buffers,a3)
-
- ; all done!
-
- alldone: movem.l (sp)+,d2-d7/a2-a6
- rts
-
- ;-----------------------------------------------------------------------------
- ; QBlit functions (called asynchronously)
-
- blit11: moveq #-1,d0
- move.l d0,(bltafwm,a0)
- move.l #(8<<16)+8,(bltbmod,a0) ; also loads bltamod
- move.w #0,(bltdmod,a0)
- move.l (source-mybltnode,a1),d0
- move.l d0,(bltapt,a0) ; source
- addq.l #8,d0
- move.l d0,(bltbpt,a0) ; source+8
- move.w #%1111111100000000,(bltcdat,a0)
- move.l (buff1-mybltnode,a1),(bltdpt,a0) ; buff1
- move.l #$0DE48000,(bltcon0,a0) ; D=AC+(B>>8)~C
- move.l (pixels_blit-mybltnode,a1),d0
- lsr.l #4,d0
- move.w d0,(bltsizv,a0) ; pixels_blit/16
- move.w #4,(bltsizh,a0) ; do blit
- lea (blit12,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit12: move.l (source-mybltnode,a1),d0
- add.l (pixels_blit-mybltnode,a1),d0
- sub.l #8+2,d0
- move.l d0,(bltapt,a0) ; source+pixels_blit-8-2
- addq.l #8,d0
- move.l d0,(bltbpt,a0) ; source+pixels_blit-2
- sub.l (source-mybltnode,a1),d0
- add.l (buff1-mybltnode,a1),d0
- move.l d0,(bltdpt,a0) ; buff1+pixels_blit-2
- move.l #$8DE40002,(bltcon0,a0) ; D=(A<<8)C+B~C, desc.
- move.w #4,(bltsizh,a0) ; do blit
- lea (blit21,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit21: move.l #(4<<16)+4,(bltbmod,a0) ; also load bltamod
- move.l (buff1-mybltnode,a1),d1
- move.l d1,(bltapt,a0) ; buff1
- addq.l #4,d1
- move.l d1,(bltbpt,a0) ; buff1+4
- move.w #%1111000011110000,(bltcdat,a0)
- move.l (buff0-mybltnode,a1),(bltdpt,a0) ; buff0
- move.l #$0DE44000,(bltcon0,a0) ; D=AC+(B>>4)~C
- move.l (pixels_blit-mybltnode,a1),d0
- lsr.l #3,d0 ; bltsizv = pixels_blit/8
- blit21a: cmp.l #32768,d0 ; check for overflow blitter
- bls.b blit21c ; branch if ok
- move.l d0,(tmp_ptr-mybltnode,a1) ; else save (too big) bltsizv
- move.w #32768,(bltsizv,a0) ; max possible bltsizv
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit21b,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit21b: move.l (tmp_ptr-mybltnode,a1),d0 ; restore (too big) bltsizv
- sub.l #32768,d0 ; subtract number already done
- bra.b blit21a ; loop back
-
- blit21c: move.w d0,(bltsizv,a0) ; pixels_blit/8
- move.w #2,(bltsizh,a0) ; do blit
- lea (blit22,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit22: move.l (buff1-mybltnode,a1),d0
- add.l (pixels_blit-mybltnode,a1),d0
- subq.l #2+4,d0
- move.l d0,(bltapt,a0) ; buff1+pixels_blit-2-4
- addq.l #4,d0
- move.l d0,(bltbpt,a0) ; buff1+pixels_blit-2
- sub.l (buff1-mybltnode,a1),d0
- add.l (buff0-mybltnode,a1),d0
- move.l d0,(bltdpt,a0) ; buff0+pixels_blit-2
- move.l #$4DE40002,(bltcon0,a0) ; D=(A<<4)C+B~C, desc.
- move.l (pixels_blit-mybltnode,a1),d0
- lsr.l #3,d0 ; bltsizv = pixels_blit/8
- blit22a: cmp.l #32768,d0 ; check for overflow blitter
- bls.b blit22c ; branch if ok
- move.l d0,(tmp_ptr-mybltnode,a1) ; else save (too big) bltsizv
- move.w #32768,(bltsizv,a0) ; max possible bltsizv
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit22b,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit22b: move.l (tmp_ptr-mybltnode,a1),d0 ; restore (too big) bltsizv
- sub.l #32768,d0 ; subtract number already done
- bra.b blit22a ; loop back
-
- blit22c: move.w d0,(bltsizv,a0) ; pixels_blit/8
- move.w #2,(bltsizh,a0) ; do blit
- lea (blit31,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit31: move.l #(2<<16)+2,(bltbmod,a0) ; also load bltamod
- move.l (buff0-mybltnode,a1),d0
- move.l d0,(bltapt,a0) ; buff0
- addq.l #2,d0
- move.l d0,(bltbpt,a0) ; buff0+2
- move.w #%1100110011001100,(bltcdat,a0)
- move.l (buff1-mybltnode,a1),(bltdpt,a0) ; buff1
- move.l #$0DE42000,(bltcon0,a0) ; D=AC+(B>>2)~C
- move.l (pixels_blit-mybltnode,a1),d0
- lsr.l #2,d0 ; bltsizv = pixels_blit/4
- blit31a: cmp.l #32768,d0 ; check for overflow blitter
- bls.b blit31c ; branch if ok
- move.l d0,(tmp_ptr-mybltnode,a1) ; else save (too big) bltsizv
- move.w #32768,(bltsizv,a0) ; max possible bltsizv
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit31b,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit31b: move.l (tmp_ptr-mybltnode,a1),d0 ; restore (too big) bltsizv
- sub.l #32768,d0 ; subtract number already done
- bra.b blit31a ; loop back
-
- blit31c: move.w d0,(bltsizv,a0) ; pixels_blit/4
- move.w #1,(bltsizh,a0) ; do final blit
- lea (blit32,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit32: move.l (buff0-mybltnode,a1),d0
- add.l (pixels_blit-mybltnode,a1),d0
- subq.l #2+2,d0
- move.l d0,(bltapt,a0) ; buff0+pixels_blit-2-2
- addq.l #2,d0
- move.l d0,(bltbpt,a0) ; buff0+pixels_blit-2
- sub.l (buff0-mybltnode,a1),d0
- add.l (buff1-mybltnode,a1),d0
- move.l d0,(bltdpt,a0) ; buff1+pixels_blit-2
- move.l #$2DE40002,(bltcon0,a0) ; D=(A<<2)C+B~C, desc.
- move.l (pixels_blit-mybltnode,a1),d0
- lsr.l #2,d0 ; bltsizv = pixels_blit/4
- blit32a: cmp.l #32768,d0 ; check for overflow blitter
- bls.b blit32c ; branch if ok
- move.l d0,(tmp_ptr-mybltnode,a1) ; else save (too big) bltsizv
- move.w #32768,(bltsizv,a0) ; max possible bltsizv
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit32b,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit32b: move.l (tmp_ptr-mybltnode,a1),d0 ; restore (too big) bltsizv
- sub.l #32768,d0 ; subtract number already done
- bra.b blit32a ; loop back
-
- blit32c: move.w d0,(bltsizv,a0) ; pixels_blit/4
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit41,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit41: moveq #0,d0
- move.l d0,(bltbmod,a0) ; also load bltamod
- move.l (buff1-mybltnode,a1),d0
- move.l d0,(bltapt,a0) ; buff1+0*plsiz_blit
- add.l (plsiz_blit-mybltnode,a1),d0
- move.l d0,(bltbpt,a0) ; buff1+1*plsiz_blit
- move.l d0,(tmp_ptr-mybltnode,a1)
- move.w #%1010101010101010,(bltcdat,a0)
- move.l (plane0-mybltnode,a1),d0
- ifd generic
- move.l (v_plsiz-mybltnode,a1),d1
- lsl.l #3,d1
- sub.l (v_plsiz-mybltnode,a1),d1
- add.l d1,d0
- else
- add.l #7*plsiz,d0
- endc
- move.l d0,(bltdpt,a0) ; Plane7
- move.l (pixels_blit-mybltnode,a1),d0
- lsr.l #4,d0
- move.w d0,(bltsizv,a0) ; pixels_blit/16
- move.l #$0DE41000,(bltcon0,a0) ; D=AC+(B>>1)~C
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit42,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit42: move.l (plsiz_blit-mybltnode,a1),d1
- move.l (tmp_ptr-mybltnode,a1),d0
- add.l d1,d0
- move.l d0,(bltapt,a0) ; buff1+2*plsiz_blit
- add.l d1,d0
- move.l d0,(bltbpt,a0) ; buff1+3*plsiz_blit
- move.l d0,(tmp_ptr-mybltnode,a1)
- move.l (plane0-mybltnode,a1),d0
- ifd generic
- move.l (v_plsiz-mybltnode,a1),d1
- add.l d1,d0
- add.l d1,d0
- add.l d1,d0
- else
- add.l #3*plsiz,d0
- endc
- move.l d0,(bltdpt,a0) ; Plane3
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit43,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit43: move.l (plsiz_blit-mybltnode,a1),d1
- move.l (tmp_ptr-mybltnode,a1),d0
- add.l d1,d0
- move.l d0,(bltapt,a0) ; buff1+4*plsiz_blit
- add.l d1,d0
- move.l d0,(bltbpt,a0) ; buff1+5*plsiz_blit
- move.l d0,(tmp_ptr-mybltnode,a1)
- move.l (plane0-mybltnode,a1),d0
- ifd generic
- move.l (v_plsiz-mybltnode,a1),d1
- add.l d1,d0
- lsl.l #2,d1
- add.l d1,d0
- else
- add.l #5*plsiz,d0
- endc
- move.l d0,(bltdpt,a0) ; Plane5
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit44,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit44: move.l (plsiz_blit-mybltnode,a1),d1
- move.l (tmp_ptr-mybltnode,a1),d0
- add.l d1,d0
- move.l d0,(bltapt,a0) ; buff1+6*plsiz_blit
- add.l d1,d0
- move.l d0,(bltbpt,a0) ; buff1+7*plsiz_blit
- move.l d0,(tmp_ptr-mybltnode,a1)
- move.l (plane0-mybltnode,a1),d0
- ifd generic
- add.l (v_plsiz-mybltnode,a1),d0
- else
- add.l #1*plsiz,d0
- endc
- move.l d0,(bltdpt,a0) ; Plane1
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit45,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit45: move.l (plsiz_blit-mybltnode,a1),d1
- move.l (tmp_ptr-mybltnode,a1),d0
- add.l d1,d0
- subq.l #2,d0
- move.l d0,(bltbpt,a0) ; buff1+8*plsiz_blit-2
- sub.l d1,d0
- move.l d0,(bltapt,a0) ; buff1+7*plsiz_blit-2
- move.l d0,(tmp_ptr-mybltnode,a1)
- move.l (plane0-mybltnode,a1),d0
- add.l d1,d0
- subq.l #2,d0
- move.l d0,(bltdpt,a0) ; Plane0
- move.l #$1DE40002,(bltcon0,a0) ; D=(A<<1)C+B~C, desc.
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit46,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit46: move.l (plsiz_blit-mybltnode,a1),d1
- move.l (tmp_ptr-mybltnode,a1),d0
- sub.l d1,d0
- move.l d0,(bltbpt,a0) ; buff1+6*plsiz_blit-2
- sub.l d1,d0
- move.l d0,(bltapt,a0) ; buff1+5*plsiz_blit-2
- move.l d0,(tmp_ptr-mybltnode,a1)
- move.l (plane0-mybltnode,a1),d0
- add.l d1,d0
- ifd generic
- move.l (v_plsiz-mybltnode,a1),d1
- lsl.l #2,d1
- add.l d1,d0
- subq.l #2,d0
- else
- add.l #4*plsiz-2,d0
- endc
- move.l d0,(bltdpt,a0) ; Plane4
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit47,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit47: move.l (plsiz_blit-mybltnode,a1),d1
- move.l (tmp_ptr-mybltnode,a1),d0
- sub.l d1,d0
- move.l d0,(bltbpt,a0) ; buff1+4*plsiz_blit-2
- sub.l d1,d0
- move.l d0,(bltapt,a0) ; buff1+3*plsiz_blit-2
- move.l d0,(tmp_ptr-mybltnode,a1)
- move.l (plane0-mybltnode,a1),d0
- add.l d1,d0
- ifd generic
- move.l (v_plsiz-mybltnode,a1),d1
- add.l d1,d0
- add.l d1,d0
- subq.l #2,d0
- else
- add.l #2*plsiz-2,d0
- endc
- move.l d0,(bltdpt,a0) ; Plane2
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit48,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- rts
-
- blit48: move.l (plsiz_blit-mybltnode,a1),d1
- move.l (tmp_ptr-mybltnode,a1),d0
- sub.l d1,d0
- move.l d0,(bltbpt,a0) ; buff1+2*plsiz_blit-2
- sub.l d1,d0
- move.l d0,(bltapt,a0) ; buff1+1*plsiz_blit-2
- move.l (plane0-mybltnode,a1),d0
- add.l d1,d0
- ifd generic
- move.l (v_plsiz-mybltnode,a1),d1
- sub.l d1,d0
- sub.l d1,d0
- lsl.l #3,d1
- add.l d1,d0
- subq.l #2,d0
- else
- add.l #6*plsiz-2,d0
- endc
- move.l d0,(bltdpt,a0) ; Plane6
- move.w #1,(bltsizh,a0) ; do blit
- lea (blit11,pc),a0
- move.l a0,(qblitfunc-mybltnode,a1)
- moveq #0,d0 ; set Z flag
- rts
-
- qblitcleanup: movem.l a2/a6,-(sp)
- move.l #mybltnode,a2
- lea (endblittime-mybltnode,a2),a0
- move.l (timerbase-mybltnode,a2),a6
- jsr (_LVOReadEClock,a6) ; may be called from interrupts
- sf (waitflag-mybltnode,a2)
- movem.l (sp)+,a2/a6
- rts
-
- ;-----------------------------------------------------------------------------
-
- section data,data
-
- quad
- buffers: dc.l 0,0,0,0,0,0,0,0
- mybltnode: dc.l 0 ; next bltnode
- qblitfunc: dc.l blit11 ; ptr to qblitfunc()
- dc.b cleanup ; stat
- dc.b 0 ; filler
- dc.w 0 ; blitsize
- dc.w 0 ; beamsync
- dc.l qblitcleanup ; ptr to qblitcleanup()
-
- quad
- chunky: dc.l 0 ; ptr to original chunky data
- plane0: dc.l 0 ; ptr to output planes
- buff0: dc.l 0 ; ptr to chip buffer0, size = pixels
- buff1: dc.l 0 ; ptr to chip buffer1, size = pixels
- source: dc.l 0 ; copy of chunky (if chip) else buff0
- ifd generic
- v_plsiz: dc.l 0 ; width*height/8
- v_plsiz_depth: dc.l 0 ; (depth-1)*width*height/8
- pixels_blit: dc.l 0 ; number of pixels handled by blitter
- plsiz_blit: dc.l 0 ; & corresponding (partial) planesize
- n_blit: dc.l 0 ; number of 32-byte units for blitter
- else
- pixels_blit: dc.l pixels/2 ; number of pixels handled by blitter
- plsiz_blit: dc.l plsiz/2 ; & corresponding (partial) planesize
- n_blit: dc.l plsiz/4/2 ; number of 32-byte units for blitter
- endc
- tmp_ptr: dc.l 0
- gfxbase: dc.l 0
- timerbase: dc.l 0
- utilitybase: dc.l 0
- starttime: dc.l 0,0
- endblittime: dc.l 0,0
- endcputime: dc.l 0,0
- waitflag: dc.b 0
- firsttimeflag: dc.b 0
-
- ;-----------------------------------------------------------------------------
- ;
- ; section bss,bss,chip ; MUST BE IN CHIP !!!!!
- ;
- ; quad
- ;buff0: ds.b pixels ;Intermediate buffer 1
- ;buff1: ds.b pixels ;Intermediate buffer 1
- ;
- ;-----------------------------------------------------------------------------
-
- end
-